# import dataset from github
url_data <- 'https://raw.githubusercontent.com/DATA301-Group-2/Project/refs/heads/main/FoodAccessResearchAtlasData2019/FoodAccessResearchAtlasData2019.csv'
data <- read.csv(url_data)
head(data)
## CensusTract State County Urban Pop2010 OHU2010 GroupQuartersFlag
## 1 1001020100 Alabama Autauga County 1 1912 693 0
## 2 1001020200 Alabama Autauga County 1 2170 743 0
## 3 1001020300 Alabama Autauga County 1 3373 1256 0
## 4 1001020400 Alabama Autauga County 1 4386 1722 0
## 5 1001020500 Alabama Autauga County 1 10766 4082 0
## 6 1001020600 Alabama Autauga County 1 3668 1311 0
## NUMGQTRS PCTGQTRS LILATracts_1And10 LILATracts_halfAnd10 LILATracts_1And20
## 1 0 0.00 0 0 0
## 2 181 8.34 1 1 1
## 3 0 0.00 0 0 0
## 4 0 0.00 0 0 0
## 5 181 1.68 0 0 0
## 6 0 0.00 1 1 1
## LILATracts_Vehicle HUNVFlag LowIncomeTracts PovertyRate MedianFamilyIncome
## 1 0 0 0 11.3 81250
## 2 0 0 1 17.9 49000
## 3 0 0 0 15.0 62609
## 4 0 0 0 2.8 70607
## 5 0 1 0 15.2 96334
## 6 0 0 1 21.6 69521
## LA1and10 LAhalfand10 LA1and20 LATracts_half LATracts1 LATracts10 LATracts20
## 1 1 1 1 1 1 0 0
## 2 1 1 1 1 1 0 0
## 3 1 1 1 1 1 0 0
## 4 1 1 1 1 1 0 0
## 5 1 1 1 1 1 0 0
## 6 1 1 1 1 1 0 0
## LATractsVehicle_20 LAPOP1_10 LAPOP05_10 LAPOP1_20 LALOWI1_10 LALOWI05_10
## 1 0 1896 1912 1896 461 467
## 2 0 1261 2170 1261 604 962
## 3 0 1552 2857 1552 478 971
## 4 0 1363 3651 1363 343 893
## 5 1 2643 7778 2643 586 1719
## 6 0 3438 3668 3438 1585 1674
## LALOWI1_20 lapophalf lapophalfshare lalowihalf lalowihalfshare lakidshalf
## 1 461 1912 100.00 467 24.42 507
## 2 604 2170 100.00 962 44.34 606
## 3 478 2857 84.70 971 28.79 771
## 4 343 3651 83.24 893 20.36 847
## 5 586 7778 72.25 1719 15.97 2309
## 6 1585 3668 100.00 1674 45.63 1008
## lakidshalfshare laseniorshalf laseniorshalfshare lawhitehalf lawhitehalfshare
## 1 26.52 221 11.56 1622 84.83
## 2 27.93 214 9.86 888 40.92
## 3 22.86 358 10.60 2177 64.53
## 4 19.30 767 17.48 3395 77.41
## 5 21.45 840 7.80 6299 58.51
## 6 27.48 411 11.21 2751 75.00
## lablackhalf lablackhalfshare laasianhalf laasianhalfshare lanhopihalf
## 1 217 11.35 14 0.73 0
## 2 1217 56.08 5 0.23 0
## 3 554 16.43 10 0.30 1
## 4 170 3.88 15 0.34 3
## 5 1001 9.29 209 1.94 5
## 6 740 20.17 9 0.25 1
## lanhopihalfshare laaianhalf laaianhalfshare laomultirhalf laomultirhalfshare
## 1 0.00 14 0.73 45 2.35
## 2 0.00 5 0.23 55 2.53
## 3 0.03 10 0.30 105 3.10
## 4 0.06 8 0.18 60 1.38
## 5 0.05 38 0.35 227 2.11
## 6 0.03 10 0.27 157 4.28
## lahisphalf lahisphalfshare lahunvhalf lahunvhalfshare lasnaphalf
## 1 44 2.30 5 0.79 92
## 2 75 3.46 93 12.47 161
## 3 78 2.30 39 3.09 139
## 4 61 1.40 19 1.13 84
## 5 277 2.57 164 4.01 235
## 6 176 4.80 73 5.54 220
## lasnaphalfshare lapop1 lapop1share lalowi1 lalowi1share lakids1 lakids1share
## 1 13.33 1896 99.19 461 24.11 504 26.33
## 2 21.70 1261 58.11 604 27.83 406 18.69
## 3 11.05 1552 46.00 478 14.18 416 12.34
## 4 4.88 1363 31.09 343 7.83 346 7.89
## 5 5.76 2643 24.55 586 5.45 715 6.64
## 6 16.82 3438 93.72 1585 43.21 955 26.03
## laseniors1 laseniors1share lawhite1 lawhite1share lablack1 lablack1share
## 1 219 11.44 1611 84.26 214 11.17
## 2 127 5.83 357 16.43 854 39.36
## 3 201 5.96 1242 36.81 255 7.56
## 4 237 5.39 1233 28.12 81 1.85
## 5 362 3.36 2168 20.14 343 3.19
## 6 375 10.22 2539 69.22 726 19.80
## laasian1 laasian1share lanhopi1 lanhopi1share laaian1 laaian1share laomultir1
## 1 14 0.72 0 0.00 14 0.73 44
## 2 4 0.18 0 0.00 4 0.20 42
## 3 8 0.24 0 0.00 2 0.06 45
## 4 7 0.16 2 0.05 4 0.08 37
## 5 47 0.44 1 0.01 14 0.13 70
## 6 9 0.25 1 0.03 9 0.26 153
## laomultir1share lahisp1 lahisp1share lahunv1 lahunv1share lasnap1
## 1 2.31 43 2.27 5 0.79 92
## 2 1.93 33 1.52 67 9.00 96
## 3 1.33 36 1.08 0 0.00 74
## 4 0.84 30 0.68 8 0.46 30
## 5 0.65 86 0.80 55 1.35 83
## 6 4.16 168 4.59 72 5.47 206
## lasnap1share lapop10 lapop10share lalowi10 lalowi10share lakids10
## 1 13.22 NULL NULL NULL NULL NULL
## 2 12.95 NULL NULL NULL NULL NULL
## 3 5.87 NULL NULL NULL NULL NULL
## 4 1.76 NULL NULL NULL NULL NULL
## 5 2.04 NULL NULL NULL NULL NULL
## 6 15.70 NULL NULL NULL NULL NULL
## lakids10share laseniors10 laseniors10share lawhite10 lawhite10share lablack10
## 1 NULL NULL NULL NULL NULL NULL
## 2 NULL NULL NULL NULL NULL NULL
## 3 NULL NULL NULL NULL NULL NULL
## 4 NULL NULL NULL NULL NULL NULL
## 5 NULL NULL NULL NULL NULL NULL
## 6 NULL NULL NULL NULL NULL NULL
## lablack10share laasian10 laasian10share lanhopi10 lanhopi10share laaian10
## 1 NULL NULL NULL NULL NULL NULL
## 2 NULL NULL NULL NULL NULL NULL
## 3 NULL NULL NULL NULL NULL NULL
## 4 NULL NULL NULL NULL NULL NULL
## 5 NULL NULL NULL NULL NULL NULL
## 6 NULL NULL NULL NULL NULL NULL
## laaian10share laomultir10 laomultir10share lahisp10 lahisp10share lahunv10
## 1 NULL NULL NULL NULL NULL NULL
## 2 NULL NULL NULL NULL NULL NULL
## 3 NULL NULL NULL NULL NULL NULL
## 4 NULL NULL NULL NULL NULL NULL
## 5 NULL NULL NULL NULL NULL NULL
## 6 NULL NULL NULL NULL NULL NULL
## lahunv10share lasnap10 lasnap10share lapop20 lapop20share lalowi20
## 1 NULL NULL NULL NULL NULL NULL
## 2 NULL NULL NULL NULL NULL NULL
## 3 NULL NULL NULL NULL NULL NULL
## 4 NULL NULL NULL NULL NULL NULL
## 5 NULL NULL NULL NULL NULL NULL
## 6 NULL NULL NULL NULL NULL NULL
## lalowi20share lakids20 lakids20share laseniors20 laseniors20share lawhite20
## 1 NULL NULL NULL NULL NULL NULL
## 2 NULL NULL NULL NULL NULL NULL
## 3 NULL NULL NULL NULL NULL NULL
## 4 NULL NULL NULL NULL NULL NULL
## 5 NULL NULL NULL NULL NULL NULL
## 6 NULL NULL NULL NULL NULL NULL
## lawhite20share lablack20 lablack20share laasian20 laasian20share lanhopi20
## 1 NULL NULL NULL NULL NULL NULL
## 2 NULL NULL NULL NULL NULL NULL
## 3 NULL NULL NULL NULL NULL NULL
## 4 NULL NULL NULL NULL NULL NULL
## 5 NULL NULL NULL NULL NULL NULL
## 6 NULL NULL NULL NULL NULL NULL
## lanhopi20share laaian20 laaian20share laomultir20 laomultir20share lahisp20
## 1 NULL NULL NULL NULL NULL NULL
## 2 NULL NULL NULL NULL NULL NULL
## 3 NULL NULL NULL NULL NULL NULL
## 4 NULL NULL NULL NULL NULL NULL
## 5 NULL NULL NULL NULL NULL NULL
## 6 NULL NULL NULL NULL NULL NULL
## lahisp20share lahunv20 lahunv20share lasnap20 lasnap20share TractLOWI
## 1 NULL NULL NULL NULL NULL 455
## 2 NULL NULL NULL NULL NULL 802
## 3 NULL NULL NULL NULL NULL 1306
## 4 NULL NULL NULL NULL NULL 922
## 5 NULL NULL NULL NULL NULL 2242
## 6 NULL NULL NULL NULL NULL 1659
## TractKids TractSeniors TractWhite TractBlack TractAsian TractNHOPI TractAIAN
## 1 507 221 1622 217 14 0 14
## 2 606 214 888 1217 5 0 5
## 3 894 439 2576 647 17 5 11
## 4 1015 904 4086 193 18 4 11
## 5 3162 1126 8666 1437 296 9 48
## 6 1008 411 2751 740 9 1 10
## TractOMultir TractHispanic TractHUNV TractSNAP
## 1 45 44 6 102
## 2 55 75 89 156
## 3 117 87 99 172
## 4 74 85 21 98
## 5 310 355 230 339
## 6 157 176 71 224
# check data types
glimpse(data)
## Rows: 72,531
## Columns: 147
## $ CensusTract <dbl> 1001020100, 1001020200, 1001020300, 1001020400, 1…
## $ State <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alab…
## $ County <chr> "Autauga County", "Autauga County", "Autauga Coun…
## $ Urban <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Pop2010 <int> 1912, 2170, 3373, 4386, 10766, 3668, 2891, 3081, …
## $ OHU2010 <int> 693, 743, 1256, 1722, 4082, 1311, 1188, 1074, 369…
## $ GroupQuartersFlag <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ NUMGQTRS <chr> "0", "181", "0", "0", "181", "0", "36", "0", "0",…
## $ PCTGQTRS <chr> "0.00", "8.34", "0.00", "0.00", "1.68", "0.00", "…
## $ LILATracts_1And10 <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LILATracts_halfAnd10 <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LILATracts_1And20 <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ LILATracts_Vehicle <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ HUNVFlag <int> 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ LowIncomeTracts <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0…
## $ PovertyRate <chr> "11.3", "17.9", "15.0", "2.8", "15.2", "21.6", "3…
## $ MedianFamilyIncome <chr> "81250", "49000", "62609", "70607", "96334", "695…
## $ LA1and10 <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1…
## $ LAhalfand10 <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1…
## $ LA1and20 <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts_half <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts1 <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LATracts20 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ LATractsVehicle_20 <int> 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ LAPOP1_10 <chr> "1896", "1261", "1552", "1363", "2643", "3438", "…
## $ LAPOP05_10 <chr> "1912", "2170", "2857", "3651", "7778", "3668", "…
## $ LAPOP1_20 <chr> "1896", "1261", "1552", "1363", "2643", "3438", "…
## $ LALOWI1_10 <chr> "461", "604", "478", "343", "586", "1585", "742",…
## $ LALOWI05_10 <chr> "467", "962", "971", "893", "1719", "1674", "1307…
## $ LALOWI1_20 <chr> "461", "604", "478", "343", "586", "1585", "742",…
## $ lapophalf <chr> "1912", "2170", "2857", "3651", "7778", "3668", "…
## $ lapophalfshare <chr> "100.00", "100.00", "84.70", "83.24", "72.25", "1…
## $ lalowihalf <chr> "467", "962", "971", "893", "1719", "1674", "1307…
## $ lalowihalfshare <chr> "24.42", "44.34", "28.79", "20.36", "15.97", "45.…
## $ lakidshalf <chr> "507", "606", "771", "847", "2309", "1008", "557"…
## $ lakidshalfshare <chr> "26.52", "27.93", "22.86", "19.30", "21.45", "27.…
## $ laseniorshalf <chr> "221", "214", "358", "767", "840", "411", "277", …
## $ laseniorshalfshare <chr> "11.56", "9.86", "10.60", "17.48", "7.80", "11.21…
## $ lawhitehalf <chr> "1622", "888", "2177", "3395", "6299", "2751", "1…
## $ lawhitehalfshare <chr> "84.83", "40.92", "64.53", "77.41", "58.51", "75.…
## $ lablackhalf <chr> "217", "1217", "554", "170", "1001", "740", "337"…
## $ lablackhalfshare <chr> "11.35", "56.08", "16.43", "3.88", "9.29", "20.17…
## $ laasianhalf <chr> "14", "5", "10", "15", "209", "9", "10", "16", "6…
## $ laasianhalfshare <chr> "0.73", "0.23", "0.30", "0.34", "1.94", "0.25", "…
## $ lanhopihalf <chr> "0", "0", "1", "3", "5", "1", "3", "0", "7", "3",…
## $ lanhopihalfshare <chr> "0.00", "0.00", "0.03", "0.06", "0.05", "0.03", "…
## $ laaianhalf <chr> "14", "5", "10", "8", "38", "10", "9", "27", "49"…
## $ laaianhalfshare <chr> "0.73", "0.23", "0.30", "0.18", "0.35", "0.27", "…
## $ laomultirhalf <chr> "45", "55", "105", "60", "227", "157", "79", "70"…
## $ laomultirhalfshare <chr> "2.35", "2.53", "3.10", "1.38", "2.11", "4.28", "…
## $ lahisphalf <chr> "44", "75", "78", "61", "277", "176", "82", "57",…
## $ lahisphalfshare <chr> "2.30", "3.46", "2.30", "1.40", "2.57", "4.80", "…
## $ lahunvhalf <chr> "5", "93", "39", "19", "164", "73", "23", "74", "…
## $ lahunvhalfshare <chr> "0.79", "12.47", "3.09", "1.13", "4.01", "5.54", …
## $ lasnaphalf <chr> "92", "161", "139", "84", "235", "220", "263", "1…
## $ lasnaphalfshare <chr> "13.33", "21.70", "11.05", "4.88", "5.76", "16.82…
## $ lapop1 <chr> "1896", "1261", "1552", "1363", "2643", "3438", "…
## $ lapop1share <chr> "99.19", "58.11", "46.00", "31.09", "24.55", "93.…
## $ lalowi1 <chr> "461", "604", "478", "343", "586", "1585", "742",…
## $ lalowi1share <chr> "24.11", "27.83", "14.18", "7.83", "5.45", "43.21…
## $ lakids1 <chr> "504", "406", "416", "346", "715", "955", "298", …
## $ lakids1share <chr> "26.33", "18.69", "12.34", "7.89", "6.64", "26.03…
## $ laseniors1 <chr> "219", "127", "201", "237", "362", "375", "109", …
## $ laseniors1share <chr> "11.44", "5.83", "5.96", "5.39", "3.36", "10.22",…
## $ lawhite1 <chr> "1611", "357", "1242", "1233", "2168", "2539", "1…
## $ lawhite1share <chr> "84.26", "16.43", "36.81", "28.12", "20.14", "69.…
## $ lablack1 <chr> "214", "854", "255", "81", "343", "726", "158", "…
## $ lablack1share <chr> "11.17", "39.36", "7.56", "1.85", "3.19", "19.80"…
## $ laasian1 <chr> "14", "4", "8", "7", "47", "9", "4", "16", "43", …
## $ laasian1share <chr> "0.72", "0.18", "0.24", "0.16", "0.44", "0.25", "…
## $ lanhopi1 <chr> "0", "0", "0", "2", "1", "1", "2", "0", "7", "3",…
## $ lanhopi1share <chr> "0.00", "0.00", "0.00", "0.05", "0.01", "0.03", "…
## $ laaian1 <chr> "14", "4", "2", "4", "14", "9", "4", "27", "47", …
## $ laaian1share <chr> "0.73", "0.20", "0.06", "0.08", "0.13", "0.26", "…
## $ laomultir1 <chr> "44", "42", "45", "37", "70", "153", "58", "70", …
## $ laomultir1share <chr> "2.31", "1.93", "1.33", "0.84", "0.65", "4.16", "…
## $ lahisp1 <chr> "43", "33", "36", "30", "86", "168", "56", "57", …
## $ lahisp1share <chr> "2.27", "1.52", "1.08", "0.68", "0.80", "4.59", "…
## $ lahunv1 <chr> "5", "67", "0", "8", "55", "72", "12", "74", "179…
## $ lahunv1share <chr> "0.79", "9.00", "0.00", "0.46", "1.35", "5.47", "…
## $ lasnap1 <chr> "92", "96", "74", "30", "83", "206", "140", "150"…
## $ lasnap1share <chr> "13.22", "12.95", "5.87", "1.76", "2.04", "15.70"…
## $ lapop10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lapop10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lalowi10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lalowi10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lakids10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lakids10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laseniors10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laseniors10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lawhite10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lawhite10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lablack10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lablack10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laasian10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laasian10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lanhopi10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lanhopi10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laaian10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laaian10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laomultir10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laomultir10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahisp10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahisp10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahunv10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahunv10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lasnap10 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lasnap10share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lapop20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lapop20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lalowi20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lalowi20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lakids20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lakids20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laseniors20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laseniors20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lawhite20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lawhite20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lablack20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lablack20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laasian20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laasian20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lanhopi20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lanhopi20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laaian20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laaian20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laomultir20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laomultir20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahisp20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahisp20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahunv20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahunv20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lasnap20 <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lasnap20share <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ TractLOWI <chr> "455", "802", "1306", "922", "2242", "1659", "217…
## $ TractKids <chr> "507", "606", "894", "1015", "3162", "1008", "686…
## $ TractSeniors <chr> "221", "214", "439", "904", "1126", "411", "360",…
## $ TractWhite <chr> "1622", "888", "2576", "4086", "8666", "2751", "2…
## $ TractBlack <chr> "217", "1217", "647", "193", "1437", "740", "435"…
## $ TractAsian <chr> "14", "5", "17", "18", "296", "9", "13", "16", "6…
## $ TractNHOPI <chr> "0", "0", "5", "4", "9", "1", "3", "0", "7", "3",…
## $ TractAIAN <chr> "14", "5", "11", "11", "48", "10", "11", "27", "4…
## $ TractOMultir <chr> "45", "55", "117", "74", "310", "157", "96", "70"…
## $ TractHispanic <chr> "44", "75", "87", "85", "355", "176", "98", "57",…
## $ TractHUNV <chr> "6", "89", "99", "21", "230", "71", "34", "68", "…
## $ TractSNAP <chr> "102", "156", "172", "98", "339", "224", "390", "…
The data types in the dataset are mostly incorrect. The binary variables for flagging low-income and low-access tracts are correctly stored as integers, and the State and County categorical variables are correctly stored as characters. All of the population count and population share variables, however, are stored as characters. The count variables should be stored as integers and the share variables as doubles.
# check summary
summary(data)
## CensusTract State County Urban
## Min. :1.001e+09 Length:72531 Length:72531 Min. :0.0000
## 1st Qu.:1.213e+10 Class :character Class :character 1st Qu.:1.0000
## Median :2.713e+10 Mode :character Mode :character Median :1.0000
## Mean :2.783e+10 Mean :0.7606
## 3rd Qu.:4.104e+10 3rd Qu.:1.0000
## Max. :5.605e+10 Max. :1.0000
## Pop2010 OHU2010 GroupQuartersFlag NUMGQTRS
## Min. : 1 Min. : 0 Min. :0.000000 Length:72531
## 1st Qu.: 2899 1st Qu.: 1108 1st Qu.:0.000000 Class :character
## Median : 4011 Median : 1525 Median :0.000000 Mode :character
## Mean : 4257 Mean : 1609 Mean :0.007114
## 3rd Qu.: 5330 3rd Qu.: 2021 3rd Qu.:0.000000
## Max. :37452 Max. :16043 Max. :1.000000
## PCTGQTRS LILATracts_1And10 LILATracts_halfAnd10 LILATracts_1And20
## Length:72531 Min. :0.0000 Min. :0.0000 Min. :0.0000
## Class :character 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Mode :character Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1281 Mean :0.2791 Mean :0.1122
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## LILATracts_Vehicle HUNVFlag LowIncomeTracts PovertyRate
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Length:72531
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 Class :character
## Median :0.0000 Median :0.0000 Median :0.0000 Mode :character
## Mean :0.1396 Mean :0.2108 Mean :0.4176
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## MedianFamilyIncome LA1and10 LAhalfand10 LA1and20
## Length:72531 Min. :0.0000 Min. :0.0000 Min. :0.0000
## Class :character 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Mode :character Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.3798 Mean :0.6828 Mean :0.3407
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## LATracts_half LATracts1 LATracts10 LATracts20
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :1.0000 Median :0.0000 Median :0.00000 Median :0.000000
## Mean :0.6388 Mean :0.3359 Mean :0.04393 Mean :0.004784
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.000000
## LATractsVehicle_20 LAPOP1_10 LAPOP05_10 LAPOP1_20
## Min. :0.0000 Length:72531 Length:72531 Length:72531
## 1st Qu.:0.0000 Class :character Class :character Class :character
## Median :0.0000 Mode :character Mode :character Mode :character
## Mean :0.2147
## 3rd Qu.:0.0000
## Max. :1.0000
## LALOWI1_10 LALOWI05_10 LALOWI1_20 lapophalf
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lapophalfshare lalowihalf lalowihalfshare lakidshalf
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lakidshalfshare laseniorshalf laseniorshalfshare lawhitehalf
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lawhitehalfshare lablackhalf lablackhalfshare laasianhalf
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## laasianhalfshare lanhopihalf lanhopihalfshare laaianhalf
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## laaianhalfshare laomultirhalf laomultirhalfshare lahisphalf
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lahisphalfshare lahunvhalf lahunvhalfshare lasnaphalf
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lasnaphalfshare lapop1 lapop1share lalowi1
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lalowi1share lakids1 lakids1share laseniors1
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## laseniors1share lawhite1 lawhite1share lablack1
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lablack1share laasian1 laasian1share lanhopi1
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lanhopi1share laaian1 laaian1share laomultir1
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## laomultir1share lahisp1 lahisp1share lahunv1
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lahunv1share lasnap1 lasnap1share lapop10
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lapop10share lalowi10 lalowi10share lakids10
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lakids10share laseniors10 laseniors10share lawhite10
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lawhite10share lablack10 lablack10share laasian10
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## laasian10share lanhopi10 lanhopi10share laaian10
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## laaian10share laomultir10 laomultir10share lahisp10
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lahisp10share lahunv10 lahunv10share lasnap10
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lasnap10share lapop20 lapop20share lalowi20
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lalowi20share lakids20 lakids20share laseniors20
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## laseniors20share lawhite20 lawhite20share lablack20
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lablack20share laasian20 laasian20share lanhopi20
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lanhopi20share laaian20 laaian20share laomultir20
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## laomultir20share lahisp20 lahisp20share lahunv20
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## lahunv20share lasnap20 lasnap20share TractLOWI
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TractKids TractSeniors TractWhite TractBlack
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TractAsian TractNHOPI TractAIAN TractOMultir
## Length:72531 Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## TractHispanic TractHUNV TractSNAP
## Length:72531 Length:72531 Length:72531
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
We can see from the minimums and maximums of the binary variables, along with the fact that we know they are stored as integers, that the only values present in the data set are in fact 0 and 1. However, there appear to be a lot of missing values in the data set, so we need to check for NAs and other missing data values.
colSums(is.na(data))
## CensusTract State County
## 0 0 0
## Urban Pop2010 OHU2010
## 0 0 0
## GroupQuartersFlag NUMGQTRS PCTGQTRS
## 0 0 0
## LILATracts_1And10 LILATracts_halfAnd10 LILATracts_1And20
## 0 0 0
## LILATracts_Vehicle HUNVFlag LowIncomeTracts
## 0 0 0
## PovertyRate MedianFamilyIncome LA1and10
## 0 0 0
## LAhalfand10 LA1and20 LATracts_half
## 0 0 0
## LATracts1 LATracts10 LATracts20
## 0 0 0
## LATractsVehicle_20 LAPOP1_10 LAPOP05_10
## 0 0 0
## LAPOP1_20 LALOWI1_10 LALOWI05_10
## 0 0 0
## LALOWI1_20 lapophalf lapophalfshare
## 0 0 0
## lalowihalf lalowihalfshare lakidshalf
## 0 0 0
## lakidshalfshare laseniorshalf laseniorshalfshare
## 0 0 0
## lawhitehalf lawhitehalfshare lablackhalf
## 0 0 0
## lablackhalfshare laasianhalf laasianhalfshare
## 0 0 0
## lanhopihalf lanhopihalfshare laaianhalf
## 0 0 0
## laaianhalfshare laomultirhalf laomultirhalfshare
## 0 0 0
## lahisphalf lahisphalfshare lahunvhalf
## 0 0 0
## lahunvhalfshare lasnaphalf lasnaphalfshare
## 0 0 0
## lapop1 lapop1share lalowi1
## 0 0 0
## lalowi1share lakids1 lakids1share
## 0 0 0
## laseniors1 laseniors1share lawhite1
## 0 0 0
## lawhite1share lablack1 lablack1share
## 0 0 0
## laasian1 laasian1share lanhopi1
## 0 0 0
## lanhopi1share laaian1 laaian1share
## 0 0 0
## laomultir1 laomultir1share lahisp1
## 0 0 0
## lahisp1share lahunv1 lahunv1share
## 0 0 0
## lasnap1 lasnap1share lapop10
## 0 0 0
## lapop10share lalowi10 lalowi10share
## 0 0 0
## lakids10 lakids10share laseniors10
## 0 0 0
## laseniors10share lawhite10 lawhite10share
## 0 0 0
## lablack10 lablack10share laasian10
## 0 0 0
## laasian10share lanhopi10 lanhopi10share
## 0 0 0
## laaian10 laaian10share laomultir10
## 0 0 0
## laomultir10share lahisp10 lahisp10share
## 0 0 0
## lahunv10 lahunv10share lasnap10
## 0 0 0
## lasnap10share lapop20 lapop20share
## 0 0 0
## lalowi20 lalowi20share lakids20
## 0 0 0
## lakids20share laseniors20 laseniors20share
## 0 0 0
## lawhite20 lawhite20share lablack20
## 0 0 0
## lablack20share laasian20 laasian20share
## 0 0 0
## lanhopi20 lanhopi20share laaian20
## 0 0 0
## laaian20share laomultir20 laomultir20share
## 0 0 0
## lahisp20 lahisp20share lahunv20
## 0 0 0
## lahunv20share lasnap20 lasnap20share
## 0 0 0
## TractLOWI TractKids TractSeniors
## 0 0 0
## TractWhite TractBlack TractAsian
## 0 0 0
## TractNHOPI TractAIAN TractOMultir
## 0 0 0
## TractHispanic TractHUNV TractSNAP
## 0 0 0
None of the variables have NAs, so it appears that the missing data are all stored as a string, “NULL”.
# extract a list of columns with string data
string_cols <- names(data)[sapply(data, is.character)]
# count the number of "NULL" values per column
null_counts <- sapply(data[string_cols], function(x) sum(x == "NULL"))
print(null_counts)
## State County NUMGQTRS PCTGQTRS
## 0 0 25 25
## PovertyRate MedianFamilyIncome LAPOP1_10 LAPOP05_10
## 3 748 29957 14540
## LAPOP1_20 LALOWI1_10 LALOWI05_10 LALOWI1_20
## 35914 29957 14540 35914
## lapophalf lapophalfshare lalowihalf lalowihalfshare
## 4568 4568 4568 4568
## lakidshalf lakidshalfshare laseniorshalf laseniorshalfshare
## 4568 4568 4568 4568
## lawhitehalf lawhitehalfshare lablackhalf lablackhalfshare
## 4568 4568 4568 4568
## laasianhalf laasianhalfshare lanhopihalf lanhopihalfshare
## 4568 4568 4568 4568
## laaianhalf laaianhalfshare laomultirhalf laomultirhalfshare
## 4568 4568 4568 4568
## lahisphalf lahisphalfshare lahunvhalf lahunvhalfshare
## 4568 4568 4568 4562
## lasnaphalf lasnaphalfshare lapop1 lapop1share
## 4568 4562 19989 19989
## lalowi1 lalowi1share lakids1 lakids1share
## 19989 19989 19989 19989
## laseniors1 laseniors1share lawhite1 lawhite1share
## 19989 19989 19989 19989
## lablack1 lablack1share laasian1 laasian1share
## 19989 19989 19989 19989
## lanhopi1 lanhopi1share laaian1 laaian1share
## 19989 19989 19989 19989
## laomultir1 laomultir1share lahisp1 lahisp1share
## 19989 19989 19989 19989
## lahunv1 lahunv1share lasnap1 lasnap1share
## 19989 19966 19989 19966
## lapop10 lapop10share lalowi10 lalowi10share
## 64765 64765 64765 64765
## lakids10 lakids10share laseniors10 laseniors10share
## 64765 64765 64765 64765
## lawhite10 lawhite10share lablack10 lablack10share
## 64765 64765 64765 64765
## laasian10 laasian10share lanhopi10 lanhopi10share
## 64765 64765 64765 64765
## laaian10 laaian10share laomultir10 laomultir10share
## 64765 64765 64765 64765
## lahisp10 lahisp10share lahunv10 lahunv10share
## 64765 64765 64765 64666
## lasnap10 lasnap10share lapop20 lapop20share
## 64765 64666 71025 71025
## lalowi20 lalowi20share lakids20 lakids20share
## 71025 71025 71025 71025
## laseniors20 laseniors20share lawhite20 lawhite20share
## 71025 71025 71025 71025
## lablack20 lablack20share laasian20 laasian20share
## 71025 71025 71025 71025
## lanhopi20 lanhopi20share laaian20 laaian20share
## 71025 71025 71025 71025
## laomultir20 laomultir20share lahisp20 lahisp20share
## 71025 71025 71025 71025
## lahunv20 lahunv20share lasnap20 lasnap20share
## 71025 70920 71025 70920
## TractLOWI TractKids TractSeniors TractWhite
## 4 4 4 4
## TractBlack TractAsian TractNHOPI TractAIAN
## 4 4 4 4
## TractOMultir TractHispanic TractHUNV TractSNAP
## 4 4 4 4
Some of the variables have very large proportions of null values, up to 71,025 null values out of 72,531, which leaves only 1,506 data points. The variables with the highest null values are the 20 mile variables, which we are not using at this point in our project. Luckily, our response variable LA1and10 has no missing values. Since we are splitting up the data as urban and rural, it is better for us to use the LAPOP1_10 if we want to look at raw population counts, instead of the lapop1 and lapop10 separately, because there is much less missing data for this variable. Since we will have split the data already using the Urban variable, we will know if we are looking at the population for 1 mile if it is an urban area and 10 miles if it is a rural area. There are still a lot of variables at the 10 mile scale that are missing 64,765 values, leaving only 7,766 data points for analysis. First we must convert the string “NULL” values to true NAs, and convert all of the variables to the appropriate data type.
# check the string_cols
string_cols
## [1] "State" "County" "NUMGQTRS"
## [4] "PCTGQTRS" "PovertyRate" "MedianFamilyIncome"
## [7] "LAPOP1_10" "LAPOP05_10" "LAPOP1_20"
## [10] "LALOWI1_10" "LALOWI05_10" "LALOWI1_20"
## [13] "lapophalf" "lapophalfshare" "lalowihalf"
## [16] "lalowihalfshare" "lakidshalf" "lakidshalfshare"
## [19] "laseniorshalf" "laseniorshalfshare" "lawhitehalf"
## [22] "lawhitehalfshare" "lablackhalf" "lablackhalfshare"
## [25] "laasianhalf" "laasianhalfshare" "lanhopihalf"
## [28] "lanhopihalfshare" "laaianhalf" "laaianhalfshare"
## [31] "laomultirhalf" "laomultirhalfshare" "lahisphalf"
## [34] "lahisphalfshare" "lahunvhalf" "lahunvhalfshare"
## [37] "lasnaphalf" "lasnaphalfshare" "lapop1"
## [40] "lapop1share" "lalowi1" "lalowi1share"
## [43] "lakids1" "lakids1share" "laseniors1"
## [46] "laseniors1share" "lawhite1" "lawhite1share"
## [49] "lablack1" "lablack1share" "laasian1"
## [52] "laasian1share" "lanhopi1" "lanhopi1share"
## [55] "laaian1" "laaian1share" "laomultir1"
## [58] "laomultir1share" "lahisp1" "lahisp1share"
## [61] "lahunv1" "lahunv1share" "lasnap1"
## [64] "lasnap1share" "lapop10" "lapop10share"
## [67] "lalowi10" "lalowi10share" "lakids10"
## [70] "lakids10share" "laseniors10" "laseniors10share"
## [73] "lawhite10" "lawhite10share" "lablack10"
## [76] "lablack10share" "laasian10" "laasian10share"
## [79] "lanhopi10" "lanhopi10share" "laaian10"
## [82] "laaian10share" "laomultir10" "laomultir10share"
## [85] "lahisp10" "lahisp10share" "lahunv10"
## [88] "lahunv10share" "lasnap10" "lasnap10share"
## [91] "lapop20" "lapop20share" "lalowi20"
## [94] "lalowi20share" "lakids20" "lakids20share"
## [97] "laseniors20" "laseniors20share" "lawhite20"
## [100] "lawhite20share" "lablack20" "lablack20share"
## [103] "laasian20" "laasian20share" "lanhopi20"
## [106] "lanhopi20share" "laaian20" "laaian20share"
## [109] "laomultir20" "laomultir20share" "lahisp20"
## [112] "lahisp20share" "lahunv20" "lahunv20share"
## [115] "lasnap20" "lasnap20share" "TractLOWI"
## [118] "TractKids" "TractSeniors" "TractWhite"
## [121] "TractBlack" "TractAsian" "TractNHOPI"
## [124] "TractAIAN" "TractOMultir" "TractHispanic"
## [127] "TractHUNV" "TractSNAP"
# extract only the columns that need to be converted to numeric
numeric_cols <- setdiff(string_cols, c("State", "County"))
numeric_cols
## [1] "NUMGQTRS" "PCTGQTRS" "PovertyRate"
## [4] "MedianFamilyIncome" "LAPOP1_10" "LAPOP05_10"
## [7] "LAPOP1_20" "LALOWI1_10" "LALOWI05_10"
## [10] "LALOWI1_20" "lapophalf" "lapophalfshare"
## [13] "lalowihalf" "lalowihalfshare" "lakidshalf"
## [16] "lakidshalfshare" "laseniorshalf" "laseniorshalfshare"
## [19] "lawhitehalf" "lawhitehalfshare" "lablackhalf"
## [22] "lablackhalfshare" "laasianhalf" "laasianhalfshare"
## [25] "lanhopihalf" "lanhopihalfshare" "laaianhalf"
## [28] "laaianhalfshare" "laomultirhalf" "laomultirhalfshare"
## [31] "lahisphalf" "lahisphalfshare" "lahunvhalf"
## [34] "lahunvhalfshare" "lasnaphalf" "lasnaphalfshare"
## [37] "lapop1" "lapop1share" "lalowi1"
## [40] "lalowi1share" "lakids1" "lakids1share"
## [43] "laseniors1" "laseniors1share" "lawhite1"
## [46] "lawhite1share" "lablack1" "lablack1share"
## [49] "laasian1" "laasian1share" "lanhopi1"
## [52] "lanhopi1share" "laaian1" "laaian1share"
## [55] "laomultir1" "laomultir1share" "lahisp1"
## [58] "lahisp1share" "lahunv1" "lahunv1share"
## [61] "lasnap1" "lasnap1share" "lapop10"
## [64] "lapop10share" "lalowi10" "lalowi10share"
## [67] "lakids10" "lakids10share" "laseniors10"
## [70] "laseniors10share" "lawhite10" "lawhite10share"
## [73] "lablack10" "lablack10share" "laasian10"
## [76] "laasian10share" "lanhopi10" "lanhopi10share"
## [79] "laaian10" "laaian10share" "laomultir10"
## [82] "laomultir10share" "lahisp10" "lahisp10share"
## [85] "lahunv10" "lahunv10share" "lasnap10"
## [88] "lasnap10share" "lapop20" "lapop20share"
## [91] "lalowi20" "lalowi20share" "lakids20"
## [94] "lakids20share" "laseniors20" "laseniors20share"
## [97] "lawhite20" "lawhite20share" "lablack20"
## [100] "lablack20share" "laasian20" "laasian20share"
## [103] "lanhopi20" "lanhopi20share" "laaian20"
## [106] "laaian20share" "laomultir20" "laomultir20share"
## [109] "lahisp20" "lahisp20share" "lahunv20"
## [112] "lahunv20share" "lasnap20" "lasnap20share"
## [115] "TractLOWI" "TractKids" "TractSeniors"
## [118] "TractWhite" "TractBlack" "TractAsian"
## [121] "TractNHOPI" "TractAIAN" "TractOMultir"
## [124] "TractHispanic" "TractHUNV" "TractSNAP"
# isolate the true string variables
string_cols <- setdiff(string_cols, numeric_cols)
string_cols
## [1] "State" "County"
# add remaining columns to numeric_cols for consistency
remaining_cols <- setdiff(names(df), c(string_cols, numeric_cols))
numeric_cols <- c(numeric_cols, remaining_cols)
# convert data into numeric, simultaneously convert "NULL" to NA
data[numeric_cols] <- lapply(data[numeric_cols], function(x) {
as.numeric(ifelse(x == "NULL", NA, x))
})
# check data types
glimpse(data)
## Rows: 72,531
## Columns: 147
## $ CensusTract <dbl> 1001020100, 1001020200, 1001020300, 1001020400, 1…
## $ State <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alab…
## $ County <chr> "Autauga County", "Autauga County", "Autauga Coun…
## $ Urban <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Pop2010 <int> 1912, 2170, 3373, 4386, 10766, 3668, 2891, 3081, …
## $ OHU2010 <int> 693, 743, 1256, 1722, 4082, 1311, 1188, 1074, 369…
## $ GroupQuartersFlag <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ NUMGQTRS <dbl> 0, 181, 0, 0, 181, 0, 36, 0, 0, 14, 10, 33, 31, 6…
## $ PCTGQTRS <dbl> 0.00, 8.34, 0.00, 0.00, 1.68, 0.00, 1.25, 0.00, 0…
## $ LILATracts_1And10 <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LILATracts_halfAnd10 <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LILATracts_1And20 <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ LILATracts_Vehicle <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ HUNVFlag <int> 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ LowIncomeTracts <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0…
## $ PovertyRate <dbl> 11.3, 17.9, 15.0, 2.8, 15.2, 21.6, 30.5, 8.9, 13.…
## $ MedianFamilyIncome <dbl> 81250, 49000, 62609, 70607, 96334, 69521, 39875, …
## $ LA1and10 <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1…
## $ LAhalfand10 <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1…
## $ LA1and20 <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts_half <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts1 <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts10 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LATracts20 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ LATractsVehicle_20 <int> 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ LAPOP1_10 <dbl> 1896, 1261, 1552, 1363, 2643, 3438, 1231, 0, 74, …
## $ LAPOP05_10 <dbl> 1912, 2170, 2857, 3651, 7778, 3668, 2287, 0, 74, …
## $ LAPOP1_20 <dbl> 1896, 1261, 1552, 1363, 2643, 3438, 1231, NA, NA,…
## $ LALOWI1_10 <dbl> 461, 604, 478, 343, 586, 1585, 742, 0, 26, 25, 90…
## $ LALOWI05_10 <dbl> 467, 962, 971, 893, 1719, 1674, 1307, 0, 26, 25, …
## $ LALOWI1_20 <dbl> 461, 604, 478, 343, 586, 1585, 742, NA, NA, NA, N…
## $ lapophalf <dbl> 1912, 2170, 2857, 3651, 7778, 3668, 2287, 3081, 1…
## $ lapophalfshare <dbl> 100.00, 100.00, 84.70, 83.24, 72.25, 100.00, 79.1…
## $ lalowihalf <dbl> 467, 962, 971, 893, 1719, 1674, 1307, 576, 2783, …
## $ lalowihalfshare <dbl> 24.42, 44.34, 28.79, 20.36, 15.97, 45.63, 45.19, …
## $ lakidshalf <dbl> 507, 606, 771, 847, 2309, 1008, 557, 815, 2883, 1…
## $ lakidshalfshare <dbl> 26.52, 27.93, 22.86, 19.30, 21.45, 27.48, 19.25, …
## $ laseniorshalf <dbl> 221, 214, 358, 767, 840, 411, 277, 341, 1050, 616…
## $ laseniorshalfshare <dbl> 11.56, 9.86, 10.60, 17.48, 7.80, 11.21, 9.57, 11.…
## $ lawhitehalf <dbl> 1622, 888, 2177, 3395, 6299, 2751, 1849, 2690, 86…
## $ lawhitehalfshare <dbl> 84.83, 40.92, 64.53, 77.41, 58.51, 75.00, 63.97, …
## $ lablackhalf <dbl> 217, 1217, 554, 170, 1001, 740, 337, 278, 1382, 6…
## $ lablackhalfshare <dbl> 11.35, 56.08, 16.43, 3.88, 9.29, 20.17, 11.67, 9.…
## $ laasianhalf <dbl> 14, 5, 10, 15, 209, 9, 10, 16, 61, 20, 3, 2, 6, 1…
## $ laasianhalfshare <dbl> 0.73, 0.23, 0.30, 0.34, 1.94, 0.25, 0.35, 0.52, 0…
## $ lanhopihalf <dbl> 0, 0, 1, 3, 5, 1, 3, 0, 7, 3, 0, 0, 0, 0, 1, 0, 1…
## $ lanhopihalfshare <dbl> 0.00, 0.00, 0.03, 0.06, 0.05, 0.03, 0.10, 0.00, 0…
## $ laaianhalf <dbl> 14, 5, 10, 8, 38, 10, 9, 27, 49, 14, 18, 14, 64, …
## $ laaianhalfshare <dbl> 0.73, 0.23, 0.30, 0.18, 0.35, 0.27, 0.30, 0.88, 0…
## $ laomultirhalf <dbl> 45, 55, 105, 60, 227, 157, 79, 70, 186, 116, 47, …
## $ laomultirhalfshare <dbl> 2.35, 2.53, 3.10, 1.38, 2.11, 4.28, 2.73, 2.27, 1…
## $ lahisphalf <dbl> 44, 75, 78, 61, 277, 176, 82, 57, 158, 80, 39, 39…
## $ lahisphalfshare <dbl> 2.30, 3.46, 2.30, 1.40, 2.57, 4.80, 2.84, 1.85, 1…
## $ lahunvhalf <dbl> 5, 93, 39, 19, 164, 73, 23, 74, 193, 82, 9, 271, …
## $ lahunvhalfshare <dbl> 0.79, 12.47, 3.09, 1.13, 4.01, 5.54, 1.91, 6.91, …
## $ lasnaphalf <dbl> 92, 161, 139, 84, 235, 220, 263, 150, 314, 298, 1…
## $ lasnaphalfshare <dbl> 13.33, 21.70, 11.05, 4.88, 5.76, 16.82, 22.12, 13…
## $ lapop1 <dbl> 1896, 1261, 1552, 1363, 2643, 3438, 1231, 3081, 9…
## $ lapop1share <dbl> 99.19, 58.11, 46.00, 31.09, 24.55, 93.72, 42.58, …
## $ lalowi1 <dbl> 461, 604, 478, 343, 586, 1585, 742, 576, 2547, 13…
## $ lalowi1share <dbl> 24.11, 27.83, 14.18, 7.83, 5.45, 43.21, 25.67, 18…
## $ lakids1 <dbl> 504, 406, 416, 346, 715, 955, 298, 815, 2573, 144…
## $ lakids1share <dbl> 26.33, 18.69, 12.34, 7.89, 6.64, 26.03, 10.31, 26…
## $ laseniors1 <dbl> 219, 127, 201, 237, 362, 375, 109, 341, 983, 599,…
## $ laseniors1share <dbl> 11.44, 5.83, 5.96, 5.39, 3.36, 10.22, 3.78, 11.07…
## $ lawhite1 <dbl> 1611, 357, 1242, 1233, 2168, 2539, 1005, 2690, 77…
## $ lawhite1share <dbl> 84.26, 16.43, 36.81, 28.12, 20.14, 69.22, 34.77, …
## $ lablack1 <dbl> 214, 854, 255, 81, 343, 726, 158, 278, 1297, 676,…
## $ lablack1share <dbl> 11.17, 39.36, 7.56, 1.85, 3.19, 19.80, 5.47, 9.02…
## $ laasian1 <dbl> 14, 4, 8, 7, 47, 9, 4, 16, 43, 19, 3, 2, 6, 11, 3…
## $ laasian1share <dbl> 0.72, 0.18, 0.24, 0.16, 0.44, 0.25, 0.13, 0.52, 0…
## $ lanhopi1 <dbl> 0, 0, 0, 2, 1, 1, 2, 0, 7, 3, 0, 0, 0, 0, 1, 0, 0…
## $ lanhopi1share <dbl> 0.00, 0.00, 0.00, 0.05, 0.01, 0.03, 0.08, 0.00, 0…
## $ laaian1 <dbl> 14, 4, 2, 4, 14, 9, 4, 27, 47, 14, 18, 14, 64, 73…
## $ laaian1share <dbl> 0.73, 0.20, 0.06, 0.08, 0.13, 0.26, 0.14, 0.88, 0…
## $ laomultir1 <dbl> 44, 42, 45, 37, 70, 153, 58, 70, 171, 111, 47, 53…
## $ laomultir1share <dbl> 2.31, 1.93, 1.33, 0.84, 0.65, 4.16, 2.00, 2.27, 1…
## $ lahisp1 <dbl> 43, 33, 36, 30, 86, 168, 56, 57, 134, 79, 39, 39,…
## $ lahisp1share <dbl> 2.27, 1.52, 1.08, 0.68, 0.80, 4.59, 1.93, 1.85, 1…
## $ lahunv1 <dbl> 5, 67, 0, 8, 55, 72, 12, 74, 179, 82, 9, 271, 85,…
## $ lahunv1share <dbl> 0.79, 9.00, 0.00, 0.46, 1.35, 5.47, 1.01, 6.91, 4…
## $ lasnap1 <dbl> 92, 96, 74, 30, 83, 206, 140, 150, 285, 289, 155,…
## $ lasnap1share <dbl> 13.22, 12.95, 5.87, 1.76, 2.04, 15.70, 11.82, 13.…
## $ lapop10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 74, 67, 2338, 2640…
## $ lapop10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.71, 1.17, 80.…
## $ lalowi10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 26, 25, 902, 1354,…
## $ lalowi10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.24, 0.45, 31.…
## $ lakids10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 11, 10, 604, 574, …
## $ lakids10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.10, 0.18, 20.…
## $ laseniors10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 14, 14, 272, 407, …
## $ laseniors10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.14, 0.24, 9.4…
## $ lawhite10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 51, 62, 1786, 1052…
## $ lawhite10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.49, 1.09, 61.…
## $ lablack10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 21, 4, 489, 1540, …
## $ lablack10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.20, 0.07, 16.…
## $ laasian10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 3, 1, 2, 0, …
## $ laasian10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.00, 0.00, 0.1…
## $ lanhopi10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, …
## $ lanhopi10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, …
## $ laaian10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 16, 5, 25, 0…
## $ laaian10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.00, 0.00, 0.5…
## $ laomultir10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 2, 1, 44, 41, 17, …
## $ laomultir10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.02, 0.01, 1.5…
## $ lahisp10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 2, 1, 33, 31, 9, 0…
## $ lahisp10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.02, 0.01, 1.1…
## $ lahunv10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 2, 3, 7, 210, 27, …
## $ lahunv10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.06, 0.14, 0.6…
## $ lasnap10 <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 2, 4, 125, 182, 50…
## $ lasnap10share <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.06, 0.21, 11.…
## $ lapop20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lapop20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lalowi20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lalowi20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lakids20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lakids20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laseniors20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laseniors20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lawhite20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lawhite20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lablack20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lablack20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laasian20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laasian20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lanhopi20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lanhopi20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laaian20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laaian20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laomultir20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laomultir20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lahisp20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lahisp20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lahunv20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lahunv20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lasnap20 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lasnap20share <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ TractLOWI <dbl> 455, 802, 1306, 922, 2242, 1659, 2175, 527, 3103,…
## $ TractKids <dbl> 507, 606, 894, 1015, 3162, 1008, 686, 815, 2909, …
## $ TractSeniors <dbl> 221, 214, 439, 904, 1126, 411, 360, 341, 1060, 63…
## $ TractWhite <dbl> 1622, 888, 2576, 4086, 8666, 2751, 2333, 2690, 87…
## $ TractBlack <dbl> 217, 1217, 647, 193, 1437, 740, 435, 278, 1387, 6…
## $ TractAsian <dbl> 14, 5, 17, 18, 296, 9, 13, 16, 61, 20, 3, 2, 6, 1…
## $ TractNHOPI <dbl> 0, 0, 5, 4, 9, 1, 3, 0, 7, 3, 0, 0, 0, 0, 1, 0, 1…
## $ TractAIAN <dbl> 14, 5, 11, 11, 48, 10, 11, 27, 49, 14, 18, 14, 64…
## $ TractOMultir <dbl> 45, 55, 117, 74, 310, 157, 96, 70, 187, 124, 47, …
## $ TractHispanic <dbl> 44, 75, 87, 85, 355, 176, 98, 57, 159, 96, 39, 39…
## $ TractHUNV <dbl> 6, 89, 99, 21, 230, 71, 34, 68, 198, 97, 9, 269, …
## $ TractSNAP <dbl> 102, 156, 172, 98, 339, 224, 390, 143, 352, 340, …
Now all of the string data types that represented numeric data have been converted into numeric formats. We can check the summary statistics again to look for outliers.
# check summary
summary(data)
## CensusTract State County Urban
## Min. :1.001e+09 Length:72531 Length:72531 Min. :0.0000
## 1st Qu.:1.213e+10 Class :character Class :character 1st Qu.:1.0000
## Median :2.713e+10 Mode :character Mode :character Median :1.0000
## Mean :2.783e+10 Mean :0.7606
## 3rd Qu.:4.104e+10 3rd Qu.:1.0000
## Max. :5.605e+10 Max. :1.0000
##
## Pop2010 OHU2010 GroupQuartersFlag NUMGQTRS
## Min. : 1 Min. : 0 Min. :0.000000 Min. : 0.0
## 1st Qu.: 2899 1st Qu.: 1108 1st Qu.:0.000000 1st Qu.: 0.0
## Median : 4011 Median : 1525 Median :0.000000 Median : 7.0
## Mean : 4257 Mean : 1609 Mean :0.007114 Mean : 110.1
## 3rd Qu.: 5330 3rd Qu.: 2021 3rd Qu.:0.000000 3rd Qu.: 64.0
## Max. :37452 Max. :16043 Max. :1.000000 Max. :19496.0
## NA's :25
## PCTGQTRS LILATracts_1And10 LILATracts_halfAnd10 LILATracts_1And20
## Min. : 0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 0.180 Median :0.0000 Median :0.0000 Median :0.0000
## Mean : 2.709 Mean :0.1281 Mean :0.2791 Mean :0.1122
## 3rd Qu.: 1.570 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :100.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## NA's :25
## LILATracts_Vehicle HUNVFlag LowIncomeTracts PovertyRate
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 6.50
## Median :0.0000 Median :0.0000 Median :0.0000 Median : 12.00
## Mean :0.1396 Mean :0.2108 Mean :0.4176 Mean : 15.18
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.: 20.60
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :100.00
## NA's :3
## MedianFamilyIncome LA1and10 LAhalfand10 LA1and20
## Min. : 2499 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 51484 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 68821 Median :0.0000 Median :1.0000 Median :0.0000
## Mean : 77038 Mean :0.3798 Mean :0.6828 Mean :0.3407
## 3rd Qu.: 93868 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :250001 Max. :1.0000 Max. :1.0000 Max. :1.0000
## NA's :748
## LATracts_half LATracts1 LATracts10 LATracts20
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :1.0000 Median :0.0000 Median :0.00000 Median :0.000000
## Mean :0.6388 Mean :0.3359 Mean :0.04393 Mean :0.004784
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.000000
##
## LATractsVehicle_20 LAPOP1_10 LAPOP05_10 LAPOP1_20
## Min. :0.0000 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.:0.0000 1st Qu.: 223 1st Qu.: 1083 1st Qu.: 271
## Median :0.0000 Median : 1024 Median : 2387 Median : 1186
## Mean :0.2147 Mean : 1612 Mean : 2657 Mean : 1750
## 3rd Qu.:0.0000 3rd Qu.: 2456 3rd Qu.: 3827 3rd Qu.: 2704
## Max. :1.0000 Max. :27227 Max. :32582 Max. :27227
## NA's :29957 NA's :14540 NA's :35914
## LALOWI1_10 LALOWI05_10 LALOWI1_20 lapophalf
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0
## 1st Qu.: 49.0 1st Qu.: 237.0 1st Qu.: 55.0 1st Qu.: 1756
## Median : 228.0 Median : 584.0 Median : 246.0 Median : 2926
## Mean : 442.4 Mean : 797.5 Mean : 468.9 Mean : 3166
## 3rd Qu.: 605.0 3rd Qu.:1128.5 3rd Qu.: 652.0 3rd Qu.: 4299
## Max. :9402.0 Max. :9874.0 Max. :9402.0 Max. :37452
## NA's :29957 NA's :14540 NA's :35914 NA's :4568
## lapophalfshare lalowihalf lalowihalfshare lakidshalf
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.: 55.63 1st Qu.: 380.0 1st Qu.: 10.16 1st Qu.: 369.0
## Median : 83.50 Median : 769.0 Median : 19.99 Median : 673.0
## Mean : 73.43 Mean : 955.2 Mean : 23.09 Mean : 770.6
## 3rd Qu.: 99.80 3rd Qu.: 1329.0 3rd Qu.: 32.91 3rd Qu.:1048.0
## Max. :100.00 Max. :19602.0 Max. :100.00 Max. :9084.0
## NA's :4568 NA's :4568 NA's :4568 NA's :4568
## lakidshalfshare laseniorshalf laseniorshalfshare lawhitehalf
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0
## 1st Qu.:11.55 1st Qu.: 195.0 1st Qu.: 5.29 1st Qu.: 1071
## Median :18.64 Median : 373.0 Median : 9.77 Median : 2193
## Mean :17.39 Mean : 422.9 Mean : 10.33 Mean : 2428
## 3rd Qu.:23.41 3rd Qu.: 586.0 3rd Qu.: 14.08 3rd Qu.: 3476
## Max. :90.80 Max. :15261.0 Max. :100.00 Max. :28477
## NA's :4568 NA's :4568 NA's :4568 NA's :4568
## lawhitehalfshare lablackhalf lablackhalfshare laasianhalf
## Min. : 0.00 Min. : 0 Min. : 0.000 Min. : 0.0
## 1st Qu.: 31.43 1st Qu.: 22 1st Qu.: 0.560 1st Qu.: 10.0
## Median : 60.55 Median : 84 Median : 2.040 Median : 32.0
## Mean : 56.06 Mean : 361 Mean : 9.065 Mean : 115.1
## 3rd Qu.: 82.92 3rd Qu.: 355 3rd Qu.: 8.580 3rd Qu.: 104.0
## Max. :100.00 Max. :13594 Max. :100.000 Max. :6964.0
## NA's :4568 NA's :4568 NA's :4568 NA's :4568
## laasianhalfshare lanhopihalf lanhopihalfshare laaianhalf
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 0.280 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 4.00
## Median : 0.810 Median : 1.000 Median : 0.020 Median : 10.00
## Mean : 2.399 Mean : 4.783 Mean : 0.105 Mean : 31.48
## 3rd Qu.: 2.340 3rd Qu.: 3.000 3rd Qu.: 0.070 3rd Qu.: 22.00
## Max. :100.000 Max. :2786.000 Max. :85.880 Max. :8507.00
## NA's :4568 NA's :4568 NA's :4568 NA's :4568
## laaianhalfshare laomultirhalf laomultirhalfshare lahisphalf
## Min. : 0.000 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.100 1st Qu.: 51.0 1st Qu.: 1.45 1st Qu.: 52.0
## Median : 0.250 Median : 112.0 Median : 2.78 Median : 139.0
## Mean : 0.807 Mean : 225.3 Mean : 4.99 Mean : 401.1
## 3rd Qu.: 0.520 3rd Qu.: 256.0 3rd Qu.: 5.93 3rd Qu.: 400.0
## Max. :100.000 Max. :6415.0 Max. :100.00 Max. :12805.0
## NA's :4568 NA's :4568 NA's :4568 NA's :4568
## lahisphalfshare lahunvhalf lahunvhalfshare lasnaphalf
## Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. : 0.0
## 1st Qu.: 1.470 1st Qu.: 18.00 1st Qu.: 1.250 1st Qu.: 35.0
## Median : 3.460 Median : 45.00 Median : 2.910 Median : 95.0
## Mean : 8.632 Mean : 69.49 Mean : 4.685 Mean : 135.5
## 3rd Qu.: 9.370 3rd Qu.: 92.00 3rd Qu.: 5.800 3rd Qu.: 193.0
## Max. :100.000 Max. :1803.00 Max. :100.000 Max. :1582.0
## NA's :4568 NA's :4568 NA's :4562 NA's :4568
## lasnaphalfshare lapop1 lapop1share lalowi1
## Min. : 0.000 Min. : 0 Min. : 0.00 Min. : 0.0
## 1st Qu.: 2.460 1st Qu.: 665 1st Qu.: 17.89 1st Qu.: 132.0
## Median : 6.260 Median : 2003 Median : 55.22 Median : 464.0
## Mean : 8.934 Mean : 2338 Mean : 54.08 Mean : 669.7
## 3rd Qu.: 12.470 3rd Qu.: 3540 3rd Qu.: 94.07 3rd Qu.: 987.0
## Max. :100.000 Max. :37061 Max. :100.00 Max. :19397.0
## NA's :4562 NA's :19989 NA's :19989 NA's :19989
## lalowi1share lakids1 lakids1share laseniors1
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.: 3.31 1st Qu.: 137.0 1st Qu.: 3.72 1st Qu.: 74.0
## Median : 11.71 Median : 456.0 Median :12.48 Median : 259.0
## Mean : 16.15 Mean : 569.7 Mean :12.81 Mean : 319.3
## 3rd Qu.: 25.26 3rd Qu.: 846.0 3rd Qu.:20.99 3rd Qu.: 484.0
## Max. :100.00 Max. :8907.0 Max. :90.80 Max. :10349.0
## NA's :19989 NA's :19989 NA's :19989 NA's :19989
## laseniors1share lawhite1 lawhite1share lablack1
## Min. : 0.000 Min. : 0 Min. : 0.00 Min. : 0.0
## 1st Qu.: 1.880 1st Qu.: 405 1st Qu.: 10.82 1st Qu.: 7.0
## Median : 6.550 Median : 1545 Median : 40.72 Median : 31.0
## Mean : 7.826 Mean : 1905 Mean : 44.09 Mean : 218.5
## 3rd Qu.: 12.230 3rd Qu.: 2969 3rd Qu.: 75.87 3rd Qu.: 161.0
## Max. :100.000 Max. :28165 Max. :100.00 Max. :12112.0
## NA's :19989 NA's :19989 NA's :19989 NA's :19989
## lablack1share laasian1 laasian1share lanhopi1
## Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.190 1st Qu.: 4.00 1st Qu.: 0.100 1st Qu.: 0.000
## Median : 0.750 Median : 14.00 Median : 0.350 Median : 0.000
## Mean : 5.243 Mean : 57.91 Mean : 1.172 Mean : 2.883
## 3rd Qu.: 3.780 3rd Qu.: 46.00 3rd Qu.: 1.030 3rd Qu.: 1.000
## Max. :100.000 Max. :5809.00 Max. :100.000 Max. :2164.000
## NA's :19989 NA's :19989 NA's :19989 NA's :19989
## lanhopi1share laaian1 laaian1share laomultir1
## Min. : 0.000 Min. : 0.0 Min. : 0.000 Min. : 0.0
## 1st Qu.: 0.000 1st Qu.: 1.0 1st Qu.: 0.030 1st Qu.: 19.0
## Median : 0.000 Median : 6.0 Median : 0.140 Median : 56.0
## Mean : 0.064 Mean : 27.3 Mean : 0.725 Mean : 126.2
## 3rd Qu.: 0.030 3rd Qu.: 15.0 3rd Qu.: 0.350 3rd Qu.: 135.0
## Max. :85.880 Max. :8444.0 Max. :100.000 Max. :6146.0
## NA's :19989 NA's :19989 NA's :19989 NA's :19989
## laomultir1share lahisp1 lahisp1share lahunv1
## Min. : 0.000 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.550 1st Qu.: 20.0 1st Qu.: 0.57 1st Qu.: 5.0
## Median : 1.410 Median : 63.0 Median : 1.56 Median : 22.0
## Mean : 2.793 Mean : 215.8 Mean : 4.63 Mean : 39.6
## 3rd Qu.: 3.080 3rd Qu.: 182.0 3rd Qu.: 4.17 3rd Qu.: 54.0
## Max. :100.000 Max. :11502.0 Max. :100.00 Max. :1794.0
## NA's :19989 NA's :19989 NA's :19989 NA's :19989
## lahunv1share lasnap1 lasnap1share lapop10
## Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. : 0.0
## 1st Qu.: 0.320 1st Qu.: 12.00 1st Qu.: 0.790 1st Qu.: 49.0
## Median : 1.400 Median : 53.00 Median : 3.430 Median : 324.0
## Mean : 2.632 Mean : 92.52 Mean : 5.996 Mean : 662.5
## 3rd Qu.: 3.400 3rd Qu.: 134.00 3rd Qu.: 8.660 3rd Qu.: 981.0
## Max. :100.000 Max. :1582.00 Max. :100.000 Max. :8850.0
## NA's :19966 NA's :19989 NA's :19966 NA's :64765
## lapop10share lalowi10 lalowi10share lakids10
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.: 1.30 1st Qu.: 14.0 1st Qu.: 0.38 1st Qu.: 9.0
## Median : 9.46 Median : 101.5 Median : 2.94 Median : 66.0
## Mean : 21.76 Mean : 249.9 Mean : 8.17 Mean : 150.8
## 3rd Qu.: 31.31 3rd Qu.: 339.0 3rd Qu.: 10.94 3rd Qu.: 216.8
## Max. :100.00 Max. :5202.0 Max. :100.00 Max. :2992.0
## NA's :64765 NA's :64765 NA's :64765 NA's :64765
## lakids10share laseniors10 laseniors10share lawhite10
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0
## 1st Qu.: 0.26 1st Qu.: 8.0 1st Qu.: 0.20 1st Qu.: 39
## Median : 1.94 Median : 52.0 Median : 1.50 Median : 258
## Mean : 4.90 Mean : 111.2 Mean : 3.81 Mean : 543
## 3rd Qu.: 6.77 3rd Qu.: 162.0 3rd Qu.: 5.30 3rd Qu.: 814
## Max. :40.00 Max. :2531.0 Max. :57.68 Max. :5485
## NA's :64765 NA's :64765 NA's :64765 NA's :64765
## lawhite10share lablack10 lablack10share laasian10
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 1.05 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.0
## Median : 7.54 Median : 1.00 Median : 0.03 Median : 0.0
## Mean : 17.99 Mean : 47.45 Mean : 1.43 Mean : 2.8
## 3rd Qu.: 25.52 3rd Qu.: 7.00 3rd Qu.: 0.20 3rd Qu.: 2.0
## Max. :100.00 Max. :4261.00 Max. :89.41 Max. :597.0
## NA's :64765 NA's :64765 NA's :64765 NA's :64765
## laasian10share lanhopi10 lanhopi10share laaian10
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.00 Median : 0.00 Median : 0.00 Median : 1.00
## Mean : 0.09 Mean : 0.52 Mean : 0.03 Mean : 38.43
## 3rd Qu.: 0.07 3rd Qu.: 0.00 3rd Qu.: 0.00 3rd Qu.: 6.00
## Max. :28.22 Max. :266.00 Max. :85.88 Max. :6947.00
## NA's :64765 NA's :64765 NA's :64765 NA's :64765
## laaian10share laomultir10 laomultir10share lahisp10
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.01 1st Qu.: 0.00
## Median : 0.02 Median : 6.00 Median : 0.18 Median : 5.00
## Mean : 1.25 Mean : 30.29 Mean : 0.97 Mean : 49.36
## 3rd Qu.: 0.18 3rd Qu.: 24.00 3rd Qu.: 0.76 3rd Qu.: 24.00
## Max. :99.34 Max. :1724.00 Max. :45.45 Max. :3953.00
## NA's :64765 NA's :64765 NA's :64765 NA's :64765
## lahisp10share lahunv10 lahunv10share lasnap10
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.01 1st Qu.: 2.00
## Median : 0.16 Median : 3.00 Median : 0.20 Median : 11.00
## Mean : 1.58 Mean : 12.55 Mean : 1.09 Mean : 31.92
## 3rd Qu.: 0.76 3rd Qu.: 13.00 3rd Qu.: 1.04 3rd Qu.: 38.00
## Max. :88.27 Max. :1514.00 Max. :67.71 Max. :995.00
## NA's :64765 NA's :64765 NA's :64666 NA's :64765
## lasnap10share lapop20 lapop20share lalowi20
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.11 1st Qu.: 4.0 1st Qu.: 0.10 1st Qu.: 1.0
## Median : 0.85 Median : 67.0 Median : 2.58 Median : 21.0
## Mean : 2.67 Mean : 360.0 Mean : 15.37 Mean : 158.8
## 3rd Qu.: 3.01 3rd Qu.: 415.8 3rd Qu.: 16.21 3rd Qu.: 131.0
## Max. :84.53 Max. :8850.0 Max. :100.00 Max. :4319.0
## NA's :64666 NA's :71025 NA's :71025 NA's :71025
## lalowi20share lakids20 lakids20share laseniors20
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.03 1st Qu.: 0.00 1st Qu.: 0.01 1st Qu.: 0.00
## Median : 0.72 Median : 12.00 Median : 0.44 Median : 12.00
## Mean : 6.38 Mean : 85.04 Mean : 3.48 Mean : 63.28
## 3rd Qu.: 5.28 3rd Qu.: 82.75 3rd Qu.: 3.28 3rd Qu.: 73.00
## Max. :99.99 Max. :2992.00 Max. :37.03 Max. :2081.00
## NA's :71025 NA's :71025 NA's :71025 NA's :71025
## laseniors20share lawhite20 lawhite20share lablack20
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.01 1st Qu.: 2.0 1st Qu.: 0.07 1st Qu.: 0.00
## Median : 0.48 Median : 48.5 Median : 1.70 Median : 0.00
## Mean : 2.80 Mean : 247.8 Mean :11.33 Mean : 3.72
## 3rd Qu.: 2.65 3rd Qu.: 271.5 3rd Qu.:10.36 3rd Qu.: 1.00
## Max. :52.08 Max. :5485.0 Max. :99.15 Max. :1086.00
## NA's :71025 NA's :71025 NA's :71025 NA's :71025
## lablack20share laasian20 laasian20share lanhopi20
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.00 Median : 0.00 Median : 0.00 Median : 0.00
## Mean : 0.14 Mean : 2.21 Mean : 0.09 Mean : 0.46
## 3rd Qu.: 0.02 3rd Qu.: 1.00 3rd Qu.: 0.03 3rd Qu.: 0.00
## Max. :20.41 Max. :447.00 Max. :15.02 Max. :146.00
## NA's :71025 NA's :71025 NA's :71025 NA's :71025
## lanhopi20share laaian20 laaian20share laomultir20
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.00 Median : 0.00 Median : 0.00 Median : 2.00
## Mean : 0.07 Mean : 81.07 Mean : 2.74 Mean : 24.68
## 3rd Qu.: 0.00 3rd Qu.: 5.00 3rd Qu.: 0.21 3rd Qu.: 14.00
## Max. :85.88 Max. :6276.00 Max. :99.34 Max. :1654.00
## NA's :71025 NA's :71025 NA's :71025 NA's :71025
## laomultir20share lahisp20 lahisp20share lahunv20
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.06 Median : 1.00 Median : 0.05 Median : 0.00
## Mean : 1.00 Mean : 45.48 Mean : 1.81 Mean : 10.77
## 3rd Qu.: 0.56 3rd Qu.: 16.00 3rd Qu.: 0.59 3rd Qu.: 4.00
## Max. :45.45 Max. :3205.00 Max. :75.80 Max. :1161.00
## NA's :71025 NA's :71025 NA's :71025 NA's :71025
## lahunv20share lasnap20 lasnap20share TractLOWI
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0
## 1st Qu.: 0.00 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.: 680
## Median : 0.00 Median : 2.0 Median : 0.14 Median : 1164
## Mean : 1.13 Mean : 19.8 Mean : 1.99 Mean : 1385
## 3rd Qu.: 0.00 3rd Qu.: 13.0 3rd Qu.: 1.12 3rd Qu.: 1846
## Max. :67.00 Max. :767.0 Max. :57.32 Max. :12562
## NA's :70920 NA's :71025 NA's :70920 NA's :4
## TractKids TractSeniors TractWhite TractBlack
## Min. : 0 Min. : 0.0 Min. : 0 Min. : 0.0
## 1st Qu.: 611 1st Qu.: 320.0 1st Qu.: 1848 1st Qu.: 43.0
## Median : 924 Median : 497.0 Median : 2914 Median : 160.0
## Mean : 1023 Mean : 555.2 Mean : 3082 Mean : 536.8
## 3rd Qu.: 1312 3rd Qu.: 718.0 3rd Qu.: 4118 3rd Qu.: 610.0
## Max. :11845 Max. :17271.0 Max. :28983 Max. :16804.0
## NA's :4 NA's :4 NA's :4 NA's :4
## TractAsian TractNHOPI TractAIAN TractOMultir
## Min. : 0.0 Min. : 0.000 Min. : 0.00 Min. : 0.0
## 1st Qu.: 17.0 1st Qu.: 0.000 1st Qu.: 7.00 1st Qu.: 85.0
## Median : 58.0 Median : 1.000 Median : 15.00 Median : 186.0
## Mean : 202.3 Mean : 7.446 Mean : 40.15 Mean : 387.7
## 3rd Qu.: 189.0 3rd Qu.: 5.000 3rd Qu.: 33.00 3rd Qu.: 448.0
## Max. :10485.0 Max. :3491.000 Max. :9009.00 Max. :8839.0
## NA's :4 NA's :4 NA's :4 NA's :4
## TractHispanic TractHUNV TractSNAP
## Min. : 0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 88 1st Qu.: 36.0 1st Qu.: 67.0
## Median : 243 Median : 82.0 Median : 152.0
## Mean : 696 Mean : 143.7 Mean : 201.8
## 3rd Qu.: 751 3rd Qu.: 168.5 3rd Qu.: 282.0
## Max. :15420 Max. :6059.0 Max. :2175.0
## NA's :4 NA's :4 NA's :4
One good thing to note is that there are no share variables over 100, which would be an impossible number. There are, however, quite a lot of 100% maximums, which are a bit suspicious and require further investigating. It could be the case that in some tracts with very low populations a 100% is possible, but there are a surprising number of them.
na_counts <- colSums(is.na(data))
na_counts
## CensusTract State County
## 0 0 0
## Urban Pop2010 OHU2010
## 0 0 0
## GroupQuartersFlag NUMGQTRS PCTGQTRS
## 0 25 25
## LILATracts_1And10 LILATracts_halfAnd10 LILATracts_1And20
## 0 0 0
## LILATracts_Vehicle HUNVFlag LowIncomeTracts
## 0 0 0
## PovertyRate MedianFamilyIncome LA1and10
## 3 748 0
## LAhalfand10 LA1and20 LATracts_half
## 0 0 0
## LATracts1 LATracts10 LATracts20
## 0 0 0
## LATractsVehicle_20 LAPOP1_10 LAPOP05_10
## 0 29957 14540
## LAPOP1_20 LALOWI1_10 LALOWI05_10
## 35914 29957 14540
## LALOWI1_20 lapophalf lapophalfshare
## 35914 4568 4568
## lalowihalf lalowihalfshare lakidshalf
## 4568 4568 4568
## lakidshalfshare laseniorshalf laseniorshalfshare
## 4568 4568 4568
## lawhitehalf lawhitehalfshare lablackhalf
## 4568 4568 4568
## lablackhalfshare laasianhalf laasianhalfshare
## 4568 4568 4568
## lanhopihalf lanhopihalfshare laaianhalf
## 4568 4568 4568
## laaianhalfshare laomultirhalf laomultirhalfshare
## 4568 4568 4568
## lahisphalf lahisphalfshare lahunvhalf
## 4568 4568 4568
## lahunvhalfshare lasnaphalf lasnaphalfshare
## 4562 4568 4562
## lapop1 lapop1share lalowi1
## 19989 19989 19989
## lalowi1share lakids1 lakids1share
## 19989 19989 19989
## laseniors1 laseniors1share lawhite1
## 19989 19989 19989
## lawhite1share lablack1 lablack1share
## 19989 19989 19989
## laasian1 laasian1share lanhopi1
## 19989 19989 19989
## lanhopi1share laaian1 laaian1share
## 19989 19989 19989
## laomultir1 laomultir1share lahisp1
## 19989 19989 19989
## lahisp1share lahunv1 lahunv1share
## 19989 19989 19966
## lasnap1 lasnap1share lapop10
## 19989 19966 64765
## lapop10share lalowi10 lalowi10share
## 64765 64765 64765
## lakids10 lakids10share laseniors10
## 64765 64765 64765
## laseniors10share lawhite10 lawhite10share
## 64765 64765 64765
## lablack10 lablack10share laasian10
## 64765 64765 64765
## laasian10share lanhopi10 lanhopi10share
## 64765 64765 64765
## laaian10 laaian10share laomultir10
## 64765 64765 64765
## laomultir10share lahisp10 lahisp10share
## 64765 64765 64765
## lahunv10 lahunv10share lasnap10
## 64765 64666 64765
## lasnap10share lapop20 lapop20share
## 64666 71025 71025
## lalowi20 lalowi20share lakids20
## 71025 71025 71025
## lakids20share laseniors20 laseniors20share
## 71025 71025 71025
## lawhite20 lawhite20share lablack20
## 71025 71025 71025
## lablack20share laasian20 laasian20share
## 71025 71025 71025
## lanhopi20 lanhopi20share laaian20
## 71025 71025 71025
## laaian20share laomultir20 laomultir20share
## 71025 71025 71025
## lahisp20 lahisp20share lahunv20
## 71025 71025 71025
## lahunv20share lasnap20 lasnap20share
## 70920 71025 70920
## TractLOWI TractKids TractSeniors
## 4 4 4
## TractWhite TractBlack TractAsian
## 4 4 4
## TractNHOPI TractAIAN TractOMultir
## 4 4 4
## TractHispanic TractHUNV TractSNAP
## 4 4 4
# check rows missing 4 values
cols_4NAs <- names(na_counts[na_counts == 4])
subset_4NAs <- data[, cols_4NAs]
rows_missing_4 <- apply(is.na(subset_4NAs), 1, all)
rows_missing_4 <- data[rows_missing_4,]
rows_missing_4
## CensusTract State County Urban Pop2010 OHU2010
## 1294 2158000100 Alaska Kusilvak Census Area 0 7459 1745
## 59644 46102940500 South Dakota Oglala Lakota County 0 4419 1036
## 59645 46102940800 South Dakota Oglala Lakota County 0 4745 1052
## 59646 46102940900 South Dakota Oglala Lakota County 0 4422 1056
## GroupQuartersFlag NUMGQTRS PCTGQTRS LILATracts_1And10
## 1294 0 NA NA 0
## 59644 0 NA NA 0
## 59645 0 NA NA 0
## 59646 0 NA NA 0
## LILATracts_halfAnd10 LILATracts_1And20 LILATracts_Vehicle HUNVFlag
## 1294 0 0 1 1
## 59644 0 0 0 0
## 59645 0 0 0 1
## 59646 0 0 0 1
## LowIncomeTracts PovertyRate MedianFamilyIncome LA1and10 LAhalfand10
## 1294 1 40.5 35354 0 0
## 59644 0 NA NA 0 0
## 59645 0 NA NA 0 0
## 59646 0 NA NA 0 0
## LA1and20 LATracts_half LATracts1 LATracts10 LATracts20 LATractsVehicle_20
## 1294 0 0 0 0 0 1
## 59644 0 0 0 0 0 0
## 59645 0 0 0 0 0 1
## 59646 0 0 0 0 0 1
## LAPOP1_10 LAPOP05_10 LAPOP1_20 LALOWI1_10 LALOWI05_10 LALOWI1_20
## 1294 NA NA NA NA NA NA
## 59644 NA NA NA NA NA NA
## 59645 NA NA NA NA NA NA
## 59646 NA NA NA NA NA NA
## lapophalf lapophalfshare lalowihalf lalowihalfshare lakidshalf
## 1294 5855 78.50 4292 57.54 2453
## 59644 3011 68.15 1866 42.24 1188
## 59645 4706 99.17 3892 82.02 1911
## 59646 4337 98.07 3292 74.45 1630
## lakidshalfshare laseniorshalf laseniorshalfshare lawhitehalf
## 1294 32.89 326 4.37 152
## 59644 26.89 177 4.00 55
## 59645 40.28 273 5.74 165
## 59646 36.86 270 6.11 165
## lawhitehalfshare lablackhalf lablackhalfshare laasianhalf
## 1294 2.03 1 0.01 14
## 59644 1.25 0 0.00 1
## 59645 3.48 2 0.04 7
## 59646 3.73 2 0.05 3
## laasianhalfshare lanhopihalf lanhopihalfshare laaianhalf laaianhalfshare
## 1294 0.19 0 0.00 5570 74.67
## 59644 0.01 1 0.02 2924 66.16
## 59645 0.15 1 0.02 4477 94.35
## 59646 0.07 0 0.00 4131 93.43
## laomultirhalf laomultirhalfshare lahisphalf lahisphalfshare lahunvhalf
## 1294 119 1.59 7 0.09 1196
## 59644 31 0.70 53 1.21 98
## 59645 54 1.13 85 1.79 160
## 59646 35 0.80 121 2.73 176
## lahunvhalfshare lasnaphalf lasnaphalfshare lapop1 lapop1share lalowi1
## 1294 68.55 818 46.88 4691 62.89 3472
## 59644 9.44 353 34.04 1207 27.32 742
## 59645 15.20 555 52.76 4620 97.37 3816
## 59646 16.71 600 56.79 3657 82.71 2811
## lalowi1share lakids1 lakids1share laseniors1 laseniors1share lawhite1
## 1294 46.54 1984 26.60 254 3.41 110
## 59644 16.80 462 10.45 81 1.84 32
## 59645 80.42 1875 39.51 270 5.69 165
## 59646 63.57 1381 31.23 242 5.47 152
## lawhite1share lablack1 lablack1share laasian1 laasian1share lanhopi1
## 1294 1.47 1 0.01 7 0.09 0
## 59644 0.73 0 0.00 0 0.00 1
## 59645 3.47 2 0.04 7 0.15 1
## 59646 3.44 1 0.03 1 0.03 0
## lanhopi1share laaian1 laaian1share laomultir1 laomultir1share lahisp1
## 1294 0.00 4482 60.09 91 1.22 3
## 59644 0.02 1158 26.21 16 0.36 31
## 59645 0.02 4392 92.57 53 1.12 85
## 59646 0.00 3474 78.57 28 0.63 99
## lahisp1share lahunv1 lahunv1share lasnap1 lasnap1share lapop10
## 1294 0.05 952 54.57 646 37.01 1562
## 59644 0.70 37 3.62 151 14.59 57
## 59645 1.79 156 14.87 544 51.76 2233
## 59646 2.23 147 13.92 510 48.27 2389
## lapop10share lalowi10 lalowi10share lakids10 lakids10share laseniors10
## 1294 20.95 1193 15.99 653 8.75 70
## 59644 1.28 34 0.76 21 0.47 7
## 59645 47.06 1671 35.23 895 18.86 136
## 59646 54.02 1849 41.81 914 20.68 145
## laseniors10share lawhite10 lawhite10share lablack10 lablack10share
## 1294 0.94 55 0.74 1 0.01
## 59644 0.16 4 0.08 0 0.00
## 59645 2.87 73 1.55 0 0.00
## 59646 3.27 96 2.17 1 0.02
## laasian10 laasian10share lanhopi10 lanhopi10share laaian10 laaian10share
## 1294 5 0.07 0 0 1478 19.81
## 59644 0 0.00 0 0 52 1.18
## 59645 6 0.13 0 0 2126 44.81
## 59646 0 0.00 0 0 2269 51.32
## laomultir10 laomultir10share lahisp10 lahisp10share lahunv10
## 1294 23 0.31 2 0.03 327
## 59644 1 0.02 0 0.00 1
## 59645 27 0.57 45 0.96 68
## 59646 23 0.51 44 1.00 85
## lahunv10share lasnap10 lasnap10share lapop20 lapop20share lalowi20
## 1294 18.73 218 12.48 1324 17.75 1015
## 59644 0.10 8 0.75 NA NA NA
## 59645 6.47 263 25.04 250 5.27 144
## 59646 8.07 321 30.35 20 0.46 13
## lalowi20share lakids20 lakids20share laseniors20 laseniors20share
## 1294 13.61 558 7.48 55 0.73
## 59644 NA NA NA NA NA
## 59645 3.03 100 2.12 22 0.47
## 59646 0.30 5 0.12 3 0.08
## lawhite20 lawhite20share lablack20 lablack20share laasian20
## 1294 38 0.52 0 0 3
## 59644 NA NA NA NA NA
## 59645 16 0.33 0 0 6
## 59646 1 0.03 0 0 0
## laasian20share lanhopi20 lanhopi20share laaian20 laaian20share
## 1294 0.04 0 0 1265 16.97
## 59644 NA NA NA NA NA
## 59645 0.13 0 0 220 4.63
## 59646 0.00 0 0 18 0.40
## laomultir20 laomultir20share lahisp20 lahisp20share lahunv20
## 1294 17 0.23 2 0.03 276
## 59644 NA NA NA NA NA
## 59645 9 0.18 0 0.00 7
## 59646 1 0.02 1 0.01 0
## lahunv20share lasnap20 lasnap20share TractLOWI TractKids TractSeniors
## 1294 16 185 10.59 NA NA NA
## 59644 NA NA NA NA NA NA
## 59645 1 35 3.32 NA NA NA
## 59646 0 3 0.30 NA NA NA
## TractWhite TractBlack TractAsian TractNHOPI TractAIAN TractOMultir
## 1294 NA NA NA NA NA NA
## 59644 NA NA NA NA NA NA
## 59645 NA NA NA NA NA NA
## 59646 NA NA NA NA NA NA
## TractHispanic TractHUNV TractSNAP
## 1294 NA NA NA
## 59644 NA NA NA
## 59645 NA NA NA
## 59646 NA NA NA
NUMGQT, PCTGQTRS, PovertyRate, MedianFamilyIncome - only 4 missing, three are in the same County, and those are the only three data points for that County.
# check rows missing 25 values
cols_25NAs <- names(na_counts[na_counts == 25])
subset_25NAs <- data[, cols_25NAs]
rows_missing_25 <- apply(is.na(subset_25NAs), 1, all)
rows_missing_25 <- data[rows_missing_25,]
These 25 rows are also missing NUMGQT and PCTGQTRS, so the rows with four missing values are a subset of these. They are not all from the same State or County, but quite a few are from Madison County in New York.
# check rows missing 4568 values
cols_4568NAs <- names(na_counts[na_counts == 4568])
subset_4568NAs <- data[, cols_4568NAs]
rows_missing_4568 <- apply(is.na(subset_4568NAs), 1, all)
rows_missing_4568 <- data[rows_missing_4568,]
table(rows_missing_4568$Urban)
##
## 0 1
## 1 4567
These 4,568 rows are all missing LAPOP1_10 and LALOWI1_10, which could be problematic if we wanted to look at those variables. All but one of them are urban tracts, which could affect the results. The other missing variables are all at the 1/2 mile measurements so they are not of concern for the current scope of this project.
# check rows missing 19989 values
cols_19989NAs <- names(na_counts[na_counts == 19989])
subset_19989NAs <- data[, cols_19989NAs]
rows_missing_19989 <- apply(is.na(subset_19989NAs), 1, all)
rows_missing_19989 <- data[rows_missing_19989,]
table(rows_missing_19989$Urban)
##
## 0 1
## 6 19983
These 19,989 rows are also missing LAPOP1_10 and LALOWI1_10, and they are also almost exclusively urban. In addition, they are missing a lot of our variables of interest.
# check rows missing 64765 values
cols_64765NAs <- names(na_counts[na_counts == 64765])
subset_64765NAs <- data[, cols_64765NAs]
rows_missing_64765 <- apply(is.na(subset_64765NAs), 1, all)
rows_missing_64765 <- data[rows_missing_64765,]
table(rows_missing_64765$Urban)
##
## 0 1
## 9970 54795
There are 64,765 rows that are missing lalowi10, lalowi10share, lakids10, lakids10share, laseniors10, laseniors10share, lawhite10, lawhite10share, lablack10, lablack10share, laasian10, laasian10share, lanhopi10, lahopi10share, laaian10. They are also disproportionately urban data points. This is potentially problematic depending on the direction we want to go. Luckily the raw population numbers are not missing, just the low-access population numbers.
Summary Statistics
library(dplyr)
library(tidyr)
library(tibble)
library(knitr)
## Warning: package 'knitr' was built under R version 4.3.3
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
use_mode <- function(x) {
ux <- na.omit(unique(x))
ux[which.max(tabulate(match(x, ux)))]
}
#urban vs rural
data %>%
group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
summarise(
n_tracts = n(),
mean_p = round(mean(lapop1share, na.rm = TRUE), 2),
median_p = round(median(lapop1share, na.rm = TRUE), 2),
sd_p = round(sd(lapop1share, na.rm = TRUE), 2),
.groups = "drop"
) %>%
kable(caption = "Urban vs Rural – Low-access population ", align = "c") %>%
kable_styling(full_width = TRUE, position = "center")
| Area | n_tracts | mean_p | median_p | sd_p |
|---|---|---|---|---|
| Rural | 17362 | 84.45 | 96.77 | 21.61 |
| Urban | 55169 | 39.10 | 31.49 | 33.29 |
#income
data %>%
group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
summarise(
n_tracts = n(),
mean_incm = round(mean(LowIncomeTracts, na.rm = TRUE) * 100, 2),
sd_incm = round(sd(LowIncomeTracts, na.rm = TRUE) * 100, 2),
.groups = "drop"
) %>%
kable(caption = "Urban vs Rural – Low-income tracts (%)", align = "c") %>%
kable_styling(full_width = TRUE, position = "center")
| Area | n_tracts | mean_incm | sd_incm |
|---|---|---|---|
| Rural | 17362 | 33.46 | 47.19 |
| Urban | 55169 | 44.37 | 49.68 |
# low access - children
data %>%
group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
summarise(
n_tracts = n(),
mean_ch = round(mean(lakids1share, na.rm = TRUE), 2),
median_ch = round(median(lakids1share, na.rm = TRUE), 2),
sd_ch = round(sd(lakids1share, na.rm = TRUE), 2),
max_ch = round(max(lakids1share, na.rm = TRUE), 2),
.groups = "drop"
) %>%
kable(caption = "Urban vs Rural Low-access children ") %>%
kable_styling(full_width = TRUE, position = "center")
| Area | n_tracts | mean_ch | median_ch | sd_ch | max_ch |
|---|---|---|---|---|---|
| Rural | 17362 | 19.55 | 20.72 | 6.43 | 77.81 |
| Urban | 55169 | 9.49 | 6.98 | 8.97 | 90.80 |
#seniors
data %>%
group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
summarise(
n_tracts = n(),
mean_snrs = round(mean(laseniors1share, na.rm = TRUE), 2),
median_snrs = round(median(laseniors1share, na.rm = TRUE), 2),
sd_snrs = round(sd(laseniors1share, na.rm = TRUE), 2),
max_snrs = round(max(laseniors1share, na.rm = TRUE), 2),
.groups = "drop"
) %>%
kable(caption = "Urban vs Rural – Low-access seniors", align = "c") %>%
kable_styling(full_width = TRUE, position = "center")
| Area | n_tracts | mean_snrs | median_snrs | sd_snrs | max_snrs |
|---|---|---|---|---|---|
| Rural | 17362 | 12.90 | 12.76 | 5.81 | 100 |
| Urban | 55169 | 5.32 | 3.48 | 6.45 | 100 |
#poverty rate
data %>%
group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
summarise(
n_tracts = n(),
mean_pr = round(mean(PovertyRate, na.rm = TRUE), 2),
median_pr = round(median(PovertyRate, na.rm = TRUE), 2),
sd_pr = round(sd(PovertyRate, na.rm = TRUE), 2),
mode_pr = round(use_mode(PovertyRate), 2),
.groups = "drop"
) %>%
kable(caption = "Urban vs Rural – Poverty rate ", align = "c") %>%
kable_styling(full_width = TRUE, position = "center")
| Area | n_tracts | mean_pr | median_pr | sd_pr | mode_pr |
|---|---|---|---|---|---|
| Rural | 17362 | 13.20 | 11.5 | 8.44 | 10 |
| Urban | 55169 | 15.81 | 12.2 | 12.76 | 0 |
#vehicle access
data %>%
group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
summarise(
n_tracts = n(),
mean_h = round(mean(lahunv1share, na.rm = TRUE), 2),
median_h = round(median(lahunv1share, na.rm = TRUE), 2),
sd_h = round(sd(lahunv1share, na.rm = TRUE), 2),
max_h = round(max(lahunv1share, na.rm = TRUE), 2),
.groups = "drop"
) %>%
kable(caption = "Urban vs Rural Households without vehicles ", align = "c") %>%
kable_styling(full_width = TRUE, position = "center")
| Area | n_tracts | mean_h | median_h | sd_h | max_h |
|---|---|---|---|---|---|
| Rural | 17362 | 3.5 | 2.68 | 3.71 | 78.49 |
| Urban | 55169 | 2.2 | 0.82 | 4.17 | 100.00 |
#correlation key variables
vars <- data %>%
select(
`Poverty Rate (%)` = PovertyRate,
`Median Family Income (USD)` = MedianFamilyIncome,
`Low-access Population ` = lapop1share,
`Low-access Children ` = lakids1share,
`Low-access Seniors ` = laseniors1share,
`No Vehicle Households ` = lahunv1share
)
cor_matrix <- round(cor(vars, use = "pairwise.complete.obs", method = "spearman"), 2)
knitr::kable(cor_matrix, caption = "Correlation Matrix of Key Variables") %>%
kable_styling(full_width = FALSE, position = "center")
| Poverty Rate (%) | Median Family Income (USD) | Low-access Population | Low-access Children | Low-access Seniors | No Vehicle Households | |
|---|---|---|---|---|---|---|
| Poverty Rate (%) | 1.00 | -0.83 | -0.08 | -0.08 | -0.07 | 0.30 |
| Median Family Income (USD) | -0.83 | 1.00 | 0.00 | 0.01 | -0.01 | -0.33 |
| Low-access Population | -0.08 | 0.00 | 1.00 | 0.91 | 0.85 | 0.60 |
| Low-access Children | -0.08 | 0.01 | 0.91 | 1.00 | 0.73 | 0.57 |
| Low-access Seniors | -0.07 | -0.01 | 0.85 | 0.73 | 1.00 | 0.62 |
| No Vehicle Households | 0.30 | -0.33 | 0.60 | 0.57 | 0.62 | 1.00 |
Check univariate plots for distributions
library(ggplot2)
library(dplyr)
data <- data %>%
mutate(
LAPOP1_10 = as.numeric(LAPOP1_10),
Urban = as.numeric(Urban),
Pop2010 = as.numeric(Pop2010),
Rural = Pop2010 - Urban, # derive rural population
TractKids = as.numeric(TractKids), # under 18
TractSeniors = as.numeric(TractSeniors) # 65+
)
### Distribution of LAPOP1_10 (response variable)
ggplot(data, aes(x = LAPOP1_10)) +
geom_histogram(bins = 30, fill = "steelblue", color = "white", alpha = 0.7) +
geom_density(aes(y = ..count..), color = "red", linewidth = 1) +
theme_minimal() +
labs(title = "Distribution of LAPOP1_10", x = "LAPOP1_10", y = "Count")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 29957 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 29957 rows containing non-finite outside the scale range
## (`stat_density()`).
ggplot(data, aes(x = factor(Urban))) +
geom_bar(fill = "orange", alpha = 0.7) +
theme_minimal() +
labs(title = "Distribution of Urban Population", x = "Urban", y = "Count")
### Distribution of Rural population
ggplot(data, aes(x = Rural)) +
geom_histogram(bins = 30, fill = "sienna", color = "white", alpha = 0.7) +
theme_minimal() +
labs(title = "Distribution of Rural Population", x = "Rural", y = "Count")
### Distribution of Age groups (Kids and Seniors)
ggplot(data, aes(x = TractKids)) +
geom_histogram(bins = 30, fill = "purple", color = "white", alpha = 0.7) +
theme_minimal() +
labs(title = "Distribution of Children (<18)", x = "TractKids", y = "Count")
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(data, aes(x = TractSeniors)) +
geom_histogram(bins = 30, fill = "orange", color = "white", alpha = 0.7) +
theme_minimal() +
labs(title = "Distribution of Seniors (65+)", x = "TractSeniors", y = "Count")
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
ggplot(data, aes(y = LAPOP1_10)) +
geom_boxplot(fill = "skyblue") +
theme_minimal() +
labs(title = "Boxplot of LAPOP1_10", y = "LAPOP1_10")
## Warning: Removed 29957 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
ggplot(data, aes(x = factor(LowIncomeTracts))) +
geom_bar(fill = "orange", alpha = 0.7) +
theme_minimal() +
labs(title = "Distribution of Low Income Tracts", x = "LowIncomeTracts (0/1)", y = "Count")
Plots
#age groups (children, seniors) urban vs rural
# children
ggplot(data, aes(x = ifelse(Urban == 1, "Urban", "Rural"),
y = lakids1share)) +
geom_boxplot(fill = "brown") +
labs(title = "Children with Low Access Urban vs Rural",
x = "Area", y = "children with low access (%)")
## Warning: Removed 19989 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
# seniors
ggplot(data, aes(x = ifelse(Urban == 1, "Urban", "Rural"),
y = laseniors1share)) +
geom_boxplot(fill = "navy") +
labs(title = "Seniors with Low Access Urban vs Rural)",
x = "Area", y = "seniors with low access (%)")
## Warning: Removed 19989 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
# vehicle access urban vs rural
ggplot(data, aes(x = ifelse(Urban == 1, "Urban", "Rural"),
y = lahunv1share)) +
geom_boxplot(fill = "blue") +
labs(title = "Vehicle Access in Urban vs Rural",
x = "Area", y = "Vehicle Access (%)")
## Warning: Removed 19966 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
#average Low-Access by State
avrge_by_state <- data %>%
mutate(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
group_by(State, Area) %>%
summarise(lowaccess = mean(lapop1share, na.rm = TRUE), .groups = "drop") %>%
mutate(region = tolower(State))
us_map <- map_data("state")
avrge_by_state <- avrge_by_state %>% filter(region %in% unique(us_map$region))
plot_df <- left_join(us_map, avrge_by_state, by = "region")
## Warning in left_join(us_map, avrge_by_state, by = "region"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
ggplot(plot_df, aes(long, lat, group = group, fill = lowaccess)) +
geom_polygon(color = "white", linewidth = 0.3) +
coord_fixed(1.3) +
facet_wrap(~ Area) +
scale_fill_gradient(low = "lightyellow", high = "darkred", name = "Low access") +
labs(title = "Average Low-Access by State Urban vs Rural") +
theme_void()
# group quarters urban vs rural
ggplot(data, aes(x = ifelse(Urban == 1, "Urban", "Rural"),
y = PCTGQTRS)) +
geom_boxplot(fill = "purple") +
labs(title = "Group Quarters Low Access in Urban vs Rural",
x = "Area", y = "Group Quarters (%)")
## Warning: Removed 25 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
#Median Family Income
ggplot(data, aes(x = MedianFamilyIncome, y = lapop1share)) +
geom_point(alpha = 0.4, color = "orange") +
facet_wrap(~ ifelse(Urban == 1, "Urban", "Rural")) +
labs(title = " Median Income in low Access urban vs rural areas ",
x = "Median Family Income (USD)", y = "Low Access(%)") +
theme_minimal()
## Warning: Removed 20484 rows containing missing values or values outside the scale range
## (`geom_point()`).
# low income urban vs rural
ggplot(data, aes(x = ifelse(Urban == 1, "Urban", "Rural"),
y = lalowi1share)) +
geom_boxplot(fill = "pink") +
labs(title = "Low Income and Low Access in Urban vs Rural",
x = "Area", y = "Low Income (%)")
## Warning: Removed 19989 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
Regression
#poverty regression
ggplot(data, aes(x = PovertyRate, y = lapop1share)) +
geom_point(alpha = 0.4, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
facet_wrap(~ ifelse(Urban == 1, "Urban", "Rural")) +
labs(title = "Poverty Rate vs Low Access Population by Area ",
x = "Poverty Rate (%)",
y = "% Low-Access Population") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 19992 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 19992 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Median Family Income Regression
ggplot(data, aes(x = MedianFamilyIncome, y = lapop1share)) +
geom_point(alpha = 0.4, color = "steelblue") +
geom_smooth(method = "lm", se = FALSE, color = "red") +
facet_wrap(~ ifelse(Urban == 1, "Urban", "Rural")) +
labs(title = "Median Family Income vs Low Access Population by Area ",
x = "Median Family Income (%)",
y = "% Low-Access Population") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 20484 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 20484 rows containing missing values or values outside the scale range
## (`geom_point()`).